#here::set_here()
suppressPackageStartupMessages({
  library(slingshot)
  library(SingleCellExperiment)
  library(cowplot)
  library(rgl)
  library(clusterExperiment)
  library(RColorBrewer)
  library(ggplot2)
  library(pheatmap)
  library(wesanderson)
  library(gridExtra)
  library(irlba)
  library(tidyverse)
  library(Signac)
  library(Seurat)
  library(patchwork)
})

Import lineage-traced data

sds <- readRDS("../data/finalTrajectory/sling.rds")
counts <- readRDS("../data/finalTrajectory/counts_noResp_noMV.rds")
# counts <- round(counts)
sce <- readRDS("../data/finalTrajectory/sce_tradeSeq20200904.rds")
load("../data/ALL_TF.Rda")
clDatta <- readRDS("../data/finalTrajectory/dattaCl_noResp_noMV.rds")
timePoint <- readRDS("../data/finalTrajectory/timePoint_noResp_noMV.rds")

Select activated and renewed HBC clusters

I will select

  • activated HBCs as cells sampled in the 24h timepoint that are in the starting cluster
  • renewed (‘resting’) HBCs as cells sampled in the 14D timepoint that are in the HBC cluster
cols <- c("#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00", "#FFFF33", "#A65628", "#F781BF", "#999999")
view3d( theta = 10, phi = 10)
# Datta labels
plot3d(reducedDim(sds), aspect = 'iso', col=cols[clDatta], alpha=.6, xlab="UMAP1", ylab="UMAP2", zlab="UMAP3", box=FALSE, top=TRUE)
plot3d(sds, add=TRUE, lwd=3, col="black")

# timepoints
plot3d(reducedDim(sds), aspect = 'iso', col=cols[timePoint], alpha=.6, xlab="UMAP1", ylab="UMAP2", zlab="UMAP3", box=FALSE, top=TRUE)
plot3d(sds, add=TRUE, lwd=3, col="black")

# cluster labels used for TI
clSds <- apply(slingClusterLabels(sds), 1, which.max)
plot3d(reducedDim(sds), aspect = 'iso', col=cols[clSds], alpha=.6, xlab="UMAP1", ylab="UMAP2", zlab="UMAP3", box=FALSE, top=TRUE)
plot3d(sds, add=TRUE, lwd=3, col="black")


actHBCId <- which(clSds == 8 & timePoint == "24HPI")
restHBCId <- which(clDatta == "HBC" & timePoint == "14DPI")
grpHBC <- rep(NA, length(c(actHBCId, restHBCId)))
grpHBC[1:length(actHBCId)] <- "activated"
grpHBC[is.na(grpHBC)] <- "resting"
grpHBC <- factor(grpHBC)
countHBC <- counts[, c(actHBCId, restHBCId)]

Processing and dimensionality reduction of each dataset separately

In the plots below, the left-hand panel corresponds to (scone-normalized) counts. The right panel corresponds to log-transformed and scaled (to a sequencing depth of 10K) counts.

set.seed(1234)

# scRNA-seq import of lineage-traced data
rna <- CreateSeuratObject(counts=countHBC)
rna$tech <- '10x'
rna$celltype <- grpHBC
rna <- FindVariableFeatures(rna, selection.method = "vst", nfeatures = 2000)
rna <- ScaleData(rna)
## Centering and scaling data matrix
rna <- RunPCA(rna, npcs=20)
## PC_ 1 
## Positive:  Slc6a6, Rps27, Junb, Aqp3, Btg2, Fos, Fxyd3, Apoe, Krt5, Jun 
##     Gclm, Socs3, Ier2, Gpm6a, Ubc, Ctsl, Cyr61, Ifitm3, Laptm4a, Atf3 
##     Gstm2, Csrp1, Ptn, Pltp, Fosb, Dusp1, Igfbp2, Klf4, Krt14, Gas6 
## Negative:  Tmsb10, Cystm1, Atf5, Plaur, Adam8, Krt6a, Map1b, Tuba1a, Tubb2b, Cst6 
##     Ptma, Ecm1, Stmn1, Emp3, Uchl1, Upk3bl, Ceacam1, Clcf1, Tm4sf1, Gprc5a 
##     Cldn7, Tubb6, Lypd3, Cldn3, Lamc2, Phlda2, Ncam1, Cldn4, Mal, Sult2b1 
## PC_ 2 
## Positive:  Igfbp2, Cald1, Tmsb10, Ogn, Tmsb4x, Igfbp4, Adam8, Krt5, Apoe, Plaur 
##     Fam101b, Tspan6, Abca4, Sparc, Tm4sf1, Col23a1, Lamc2, Krt6a, Clcf1, Tmem108 
##     Slc29a1, Krt14, Tubb6, Ngf, Cst6, Oaf, Tubb2b, Ecm1, Hmga2, Car12 
## Negative:  Prdx6, Adh7, Gsta3, Sftpd, Cyp2f2, Ly6a, Tst, Slc16a11, Ifitm1, Reg3g 
##     Muc1, Por, Cbr2, Adh1, Lrrc26, Selenbp1, Mgst1, Gpx2, Sult1d1, Gstp1 
##     Cbr3, Cd14, Cyp4b1, Cxcl17, Gsto1, Serpinb11, Elf3, Ly6c1, Serpina3n, Epas1 
## PC_ 3 
## Positive:  Adh7, Aldh3a1, Selenbp1, Adh1, Ly6a, Oat, Defb1, Fmo2, Cbr3, Gstp1 
##     Tmem176a, Atp1b1, Tmem176b, Efemp1, Igfbp4, Upk1b, Sparc, Ly6c1, Tgm2, Igfbp2 
##     Mgp, Kcnj16, Sult1d1, Gmpr, Cdo1, Krt15, Nrtn, Cyp4b1, Muc1, Bsg 
## Negative:  Cldn4, X1810011O10Rik, Klf4, Sfn, Zfp36, S100a10, Mast4, Atf3, Tagln2, Fosb 
##     Avpi1, Arc, Nr4a1, Cldn7, Nfat5, Egr1, Klf6, Cdkn1a, Phlda1, Lmna 
##     Crym, Neat1, Anxa1, Krt18, S100a6, Acsm4, Lypd3, Papss2, Ier2, Krt23 
## PC_ 4 
## Positive:  mt.Co1, mt.Co3, mt.Atp6, Hpgd, Pla2r1, X1500009L16Rik, Acsm4, Rps27rt, Car12, Dapl1 
##     Myo6, Fth1, Papss2, Lypd2, Insl6, Cyb5r3, Scd2, Gas6, Trp63, Nmb 
##     Eya1, Piezo2, Limd2, Crym, Sema5a, Serpinb1b, Ftl1, X4631405K08Rik, Fndc1, Scgb1c1 
## Negative:  Actg1, Anxa3, Ier3, Nfkbia, F3, Tmem176b, Krt7, Sfn, Cyr61, Cldn4 
##     Krt14, Tacstd2, Tuba1c, Tmem176a, Fos, Bhlhe40, Junb, Tnfrsf12a, Gmpr, Cish 
##     Fst, Cxcl16, Plaur, Krt17, Ubc, Trib1, Oat, Atf3, Adam8, Dusp5 
## PC_ 5 
## Positive:  Fosb, Egr1, Neat1, Irs2, Pdcd4, Nr4a1, Lmna, Klf9, Cbx3, Dst 
##     Ccdc3, Mast4, Sdc4, Timp3, Nfia, Kdm6b, Birc5, Maff, Spry2, Il6st 
##     Stmn1, Hist1h2ap, Nfat5, Sik1, Arid5a, Gm26532, Pbk, Gclc, Tpm1, Fam101b 
## Negative:  Aldh2, Gstm1, Prdx1, Aqp3, Ces1d, Tst, Gsn, Clu, Ephx1, Cpe 
##     Ifitm3, Ubc, Krt18, Prdx6, Hspb1, Galm, Fmo6, Hspa5, Ppa1, Krt5 
##     Krt8, Pltp, Mgst1, Ctsl, Cyp2a5, Alas1, Cryl1, Calml4, Gstm2, Actg1
rna <- RunUMAP(rna, dims = 1:20, min_dist=.1)
## Warning: The following arguments are not used: min_dist
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 10:54:49 UMAP embedding parameters a = 0.9922 b = 1.112
## 10:54:49 Read 3064 rows and found 20 numeric columns
## 10:54:49 Using Annoy for neighbor search, n_neighbors = 30
## 10:54:49 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 10:54:49 Writing NN index file to temp file /var/folders/15/gs2v829x4gxcdv0tlgk_5l4m0000gn/T//Rtmpk0Xbh6/file133c6617506a
## 10:54:49 Searching Annoy index using 1 thread, search_k = 3000
## 10:54:50 Annoy recall = 100%
## 10:54:50 Commencing smooth kNN distance calibration using 1 thread
## 10:54:51 Initializing from normalized Laplacian + noise
## 10:54:52 Commencing optimization for 500 epochs, with 122146 positive edges
## 10:54:56 Optimization finished
DimPlot(rna, reduction = "umap", group.by="celltype", label=TRUE) + 
  NoLegend() +
  ggtitle("Lineage-traced data")
## Warning: Using `as.character()` on a quosure is deprecated as of rlang 0.3.0.
## Please use `as_label()` or `as_name()` instead.
## This warning is displayed once per session.

# Markers for activated/resting HBC in scRNA-seq
rnaNorm = NormalizeData(rna, normalization.method = "LogNormalize")
plot_grid(FeaturePlot(rna, features = "Krtdap"),
          FeaturePlot(rnaNorm, features = "Krtdap"))

plot_grid(FeaturePlot(rna, features = "Krt6a"),
          FeaturePlot(rnaNorm, features = "Krt6a"))

plot_grid(FeaturePlot(rna, features = "Krt5"),
          FeaturePlot(rnaNorm, features = "Krt5"))

plot_grid(FeaturePlot(rna, features = "Krt14"),
          FeaturePlot(rnaNorm, features = "Krt14"))

plot_grid(FeaturePlot(rna, features = "Trp63"),
          FeaturePlot(rnaNorm, features = "Trp63"))

plot_grid(FeaturePlot(rna, features = "Reg3g"), #respirartory (Brann)
          FeaturePlot(rnaNorm, features = "Reg3g"))

plot_grid(FeaturePlot(rna, features = "Foxj1"), #resp ciliated (Brann)
          FeaturePlot(rnaNorm, features = "Foxj1"))

plot_grid(FeaturePlot(rna, features = "Muc5ac"), #resp secretory (Brann)
          FeaturePlot(rnaNorm, features = "Muc5ac")) 

plot_grid(FeaturePlot(rna, features = "Muc5b"), #respiratory (Boying)
          FeaturePlot(rnaNorm, features = "Muc5b"))

plot_grid(FeaturePlot(rna, features = "Cbr2"), #resp basal cell (Boying)
          FeaturePlot(rnaNorm, features = "Cbr2"))

plot_grid(FeaturePlot(rna, features = "Sult1c1"), #resp basal and resp epithelial (Boying)
          FeaturePlot(rnaNorm, features = "Sult1c1"))

# scRNA-seq import of whole OE data
# Import whole OE data
sceWOE <- readRDS("../data/wholeOE/sceWOE.rds")
# scRNA-seq import of lineage-traced data
rnaWOE <- CreateSeuratObject(counts=as.matrix(assays(sceWOE)$counts))
rnaWOE$tech <- '10x'
rnaWOE$celltype <- colData(sceWOE)$seurat_annotated
rnaWOE <- FindVariableFeatures(rnaWOE, selection.method = "vst", nfeatures = 2000)
rnaWOE <- ScaleData(rnaWOE)
## Centering and scaling data matrix
rnaWOE <- RunPCA(rnaWOE, npcs=20)
## PC_ 1 
## Positive:  Hpgd, Aox2, Abca4, Nrcam, Car12, Azgp1, Abi3bp, Kcnj13, Gpr155, Apoc1 
##     Dcn, Gpha2, Ccdc153, 1500009L16Rik, Timp3, Sostdc1, Insl6, Itpkb, Irf1, 1200007C13Rik 
##     Cldn11, Hsd17b6, Defb1, Acsm4, Adh1, H2-Eb1, Cd36, Hist2h4, Fmo3, Gal3st2c 
## Negative:  Ran, Hsp90ab1, Ranbp1, Eif5a, Hnrnpf, Npm1, Pfn1, Serbp1, Set, Nme1 
##     Eif4a1, Ybx1, Anp32b, Rps2, Nhp2, Hspd1, Cct2, Ncl, Snrpd1, Ube2m 
##     Cct8, Prmt1, Hspe1, Hnrnpab, Ppp1r14b, Tubb5, Cct3, Rpsa, Cct5, Pa2g4 
## PC_ 2 
## Positive:  Igfbp2, Nrcam, Basp1, Tinagl1, Car12, Hpgd, Krt5, Krt14, Abca4, Axl 
##     G0s2, Ogn, Slc1a3, Thbs1, Ass1, Cald1, Fgfbp1, Col18a1, Krt17, Nrg1 
##     Igfbp4, Fscn1, Itga3, Azgp1, Bin1, 1500009L16Rik, Palld, Pxdc1, Ddah1, Tubb6 
## Negative:  Tspan1, Serpinb11, Bpifa1, Muc4, Reg3g, Fetub, Pon1, Tst, Gabrp, Tspan13 
##     Lypd2, Slc16a11, Lrrc26, Tmc5, Slc31a1, Cmtm8, Clic6, Slc44a4, Cyp2j6, Mlph 
##     Cxcl17, Xbp1, Sec11c, Myo5c, Fam174b, 5330417C22Rik, Muc16, Vtcn1, Pdzk1ip1, Tff2 
## PC_ 3 
## Positive:  Stmn1, Spc24, Tk1, Cenph, Pbk, Clspn, Smc2, Cenpw, Cenpm, Asf1b 
##     Smc4, Ndc80, Ccdc34, Pclaf, Atad2, Hmgb2, Tacc3, Cenpk, Tyms, Cdk1 
##     Anln, Birc5, Rad51, Rrm2, H2afz, Aurkb, Rrm1, Melk, Rad51ap1, Bub1 
## Negative:  Cldn4, Pttg1ip, Plaur, Adam8, Lamc2, Pls3, Fst, Rbms1, St14, Ngf 
##     Cdh1, Capn2, Ifi202b, Plin2, Irf6, Zfp593, Nabp1, Palld, Rtn4, Tnfrsf12a 
##     Msn, Sfn, Rnf19b, Hbegf, Clcf1, Tubb6, Fosl1, Prrg4, Taldo1, Glrx 
## PC_ 4 
## Positive:  Tgm2, Adh7, 4833423E24Rik, Gpx2, Abi3bp, Gsto1, Ly6a, Adh1, Ly6d, Anxa8 
##     Serpinb5, Mgp, Defb1, Cd14, Cfh, Fmo3, Pax9, Pir, Gm20186, Cdc14a 
##     Cdh13, Slc39a4, Col25a1, Capg, Pcolce, Sat1, Gdpd2, Rbp1, Irx3, Gda 
## Negative:  Nrcam, Basp1, Aox2, Car12, Hpgd, Azgp1, Igfbp2, Ermn, 1500009L16Rik, Scgb1c1 
##     Mdk, Abca4, G0s2, Insl6, Nt5dc2, Calml4, Galm, Pdlim3, Tmem108, Ogn 
##     Plscr2, Nectin3, Hsd17b6, Kcnj13, Sostdc1, Gal3st2c, Pcdh17, Crabp2, Slc1a3, Mef2c 
## PC_ 5 
## Positive:  Anxa1, Lmo7, Nectin2, Cldn3, Ctsh, St14, Tubb2b, Cldn7, Rab11fip1, Ceacam1 
##     Crabp2, Cryab, Espn, Cldn6, Mal, Cdh1, Cxcl16, Blnk, Elf5, Cldn9 
##     Hebp2, Cldn4, Crb3, Tjp2, Gm14137, Akap12, Ndrg1, Elf3, Tacstd2, Metrnl 
## Negative:  Htra1, Nrg1, Ung, Slc16a1, Bpifa1, Glyat, Srm, Ano1, Mcm7, Sssca1 
##     Npm3, Tff2, Dctpp1, Pold2, Psmc3ip, Cotl1, Lypd2, Bpifb1, Aldh1a1, Pdzrn4 
##     Tipin, Axl, Ccbe1, Cdca7, Tcn2, Pon1, Reg3g, Nes, Sigmar1, Muc16
rnaWOE <- RunUMAP(rnaWOE, dims = 1:20, min_dist=.1)
## Warning: The following arguments are not used: min_dist
## 10:55:10 UMAP embedding parameters a = 0.9922 b = 1.112
## 10:55:10 Read 2954 rows and found 20 numeric columns
## 10:55:10 Using Annoy for neighbor search, n_neighbors = 30
## 10:55:10 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 10:55:10 Writing NN index file to temp file /var/folders/15/gs2v829x4gxcdv0tlgk_5l4m0000gn/T//Rtmpk0Xbh6/file133c4a09383
## 10:55:10 Searching Annoy index using 1 thread, search_k = 3000
## 10:55:11 Annoy recall = 100%
## 10:55:11 Commencing smooth kNN distance calibration using 1 thread
## 10:55:13 Initializing from normalized Laplacian + noise
## 10:55:13 Commencing optimization for 500 epochs, with 123922 positive edges
## 10:55:17 Optimization finished
DimPlot(rnaWOE, reduction = "umap", group.by="celltype", label=TRUE) + 
  NoLegend() +
  ggtitle("Whole OE data")

rnaWOENorm = NormalizeData(rnaWOE, normalization.method = "LogNormalize")
plot_grid(FeaturePlot(rnaWOE, features = "Krtdap"),
          FeaturePlot(rnaWOENorm, features = "Krtdap"))

plot_grid(FeaturePlot(rnaWOE, features = "Krt6a"),
          FeaturePlot(rnaWOENorm, features = "Krt6a"))

plot_grid(FeaturePlot(rnaWOE, features = "Krt5"),
          FeaturePlot(rnaWOENorm, features = "Krt5"))

plot_grid(FeaturePlot(rnaWOE, features = "Krt14"),
          FeaturePlot(rnaWOENorm, features = "Krt14"))

plot_grid(FeaturePlot(rnaWOE, features = "Trp63"),
          FeaturePlot(rnaWOENorm, features = "Trp63"))

plot_grid(FeaturePlot(rnaWOE, features = "Reg3g"), #respirartory (Brann)
          FeaturePlot(rnaWOENorm, features = "Reg3g"))

plot_grid(FeaturePlot(rnaWOE, features = "Foxj1"), #resp ciliated (Brann)
          FeaturePlot(rnaWOENorm, features = "Foxj1"))

plot_grid(FeaturePlot(rnaWOE, features = "Muc5ac"), #resp secretory (Brann)
          FeaturePlot(rnaWOENorm, features = "Muc5ac")) 

plot_grid(FeaturePlot(rnaWOE, features = "Muc5b"), #respiratory (Boying)
          FeaturePlot(rnaWOENorm, features = "Muc5b"))

plot_grid(FeaturePlot(rnaWOE, features = "Cbr2"), #resp basal cell (Boying)
          FeaturePlot(rnaWOENorm, features = "Cbr2"))

plot_grid(FeaturePlot(rnaWOE, features = "Sult1c1"), #resp basal and resp epithelial (Boying)
          FeaturePlot(rnaWOENorm, features = "Sult1c1"))

## select Krt5 negative respiratory basal cells for Divya
Seurat::Assays(rnaWOE)
## [1] "RNA"
seurCount <- Seurat::GetAssay(rnaWOE, "RNA")
krt5Count <- seurCount["Krt5",]
krt5NegRBC <- rnaWOE$celltype == "Respiratory basal cell" & krt5Count == 0
rnaWOE$krt5NegRBC <- krt5NegRBC[1,]
DimPlot(rnaWOE, group.by = "krt5NegRBC")

cellsToRemove <- Seurat::Cells(rnaWOE)[which(krt5NegRBC[1,])]
saveRDS(cellsToRemove, file="../data/cellsToRemove_respBC_ForDivya.rds")

Assign sub-hybrid labels to lineage-traced scRNA-seq data

ctHybridSub <- as.character(rna$celltype)
hlpid <- which(rna@reductions$umap@cell.embeddings[,1] > 2 & rna@reductions$umap@cell.embeddings[,2]>-1)
ctHybridSub[hlpid] <- NA
hlp2id <- which(rna@reductions$umap@cell.embeddings[,1] > 2 & rna@reductions$umap@cell.embeddings[,2]>-1 & rna@reductions$umap@cell.embeddings[,2]<4)
ctHybridSub[hlp2id] <- "hybrid1"
hlp1id <- which(rna@reductions$umap@cell.embeddings[,1] > 2 & rna@reductions$umap@cell.embeddings[,2]>4)
ctHybridSub[hlp1id] <- "hybrid2"
rna$ctHybridSub <- ctHybridSub

DimPlot(rna, group.by = "celltype", label = FALSE, repel = TRUE) +
  ggtitle("Lineage-traced scRNA-seq data") 

DimPlot(rna, group.by = "ctHybridSub", label = FALSE, repel = TRUE) +
  ggtitle("Lineage-traced scRNA-seq data") 

table(ctHybridSub)
## ctHybridSub
## activated   hybrid1   hybrid2   resting 
##       602       130       148      2184

Marker genes and overview of gene expression per cell state

# Marker genes for hybrid clusters
ctHybrid <- rna$ctHybridSub
ctHybrid[ctHybrid == "hybrid1" | ctHybrid == "hybrid2"] <- "hybrid"
Idents(rnaNorm) <- ctHybrid
rnaNormMarkers <- FindAllMarkers(rnaNorm, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
## Calculating cluster activated
## Calculating cluster hybrid
## Calculating cluster resting
rnaNormMarkers %>%
    group_by(cluster) %>%
    top_n(n = 10, wt = avg_logFC) -> top10rnaNorm
DoHeatmap(rnaNorm, features = top10rnaNorm$gene) + NoLegend()

# Marker genes for subhybrid clusters
Idents(rnaNorm) <- rna$ctHybridSub
rnaNormMarkers <- FindAllMarkers(rnaNorm, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
## Calculating cluster activated
## Calculating cluster hybrid1
## Calculating cluster hybrid2
## Calculating cluster resting
rnaNormMarkers %>%
    group_by(cluster) %>%
    top_n(n = 10, wt = avg_logFC) -> top10rnaNorm
DoHeatmap(rnaNorm, features = top10rnaNorm$gene) + NoLegend()

Integrate data

We will transfer the whole OE labels to the lineage-traced scRNA-seq data. This will allow us to check: - Whether regenerated HBCs are close to resting - Whether RNA hybrid cells are similar to respiratory cells (we hope not)

rna$origin <- "linTrace"
rnaWOE$origin <- "WOE"
seurlist <- list("linrna"=rna,
                 "woerna"=rnaWOE)
features <- SelectIntegrationFeatures(object.list = seurlist)
anchors <- FindIntegrationAnchors(object.list = seurlist, 
                                  anchor.features = features)
## Scaling features for provided objects
## Finding all pairwise anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 7559 anchors
## Filtering anchors
##  Retained 1758 anchors
## Extracting within-dataset neighbors
integrated <- IntegrateData(anchorset = anchors)
## Merging dataset 2 into 1
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Warning: Adding a command log without an assay associated with it

Analyze integrated dataset

DefaultAssay(integrated) <- "integrated"

# Run the standard workflow for visualization and clustering
integrated <- ScaleData(integrated, verbose = FALSE)
integrated <- RunPCA(integrated, npcs = 30, verbose = FALSE)
integrated <- RunUMAP(integrated, reduction = "pca", dims = 1:30)
## 10:56:57 UMAP embedding parameters a = 0.9922 b = 1.112
## 10:56:57 Read 6018 rows and found 30 numeric columns
## 10:56:57 Using Annoy for neighbor search, n_neighbors = 30
## 10:56:57 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 10:56:58 Writing NN index file to temp file /var/folders/15/gs2v829x4gxcdv0tlgk_5l4m0000gn/T//Rtmpk0Xbh6/file133c2d87a7fe
## 10:56:58 Searching Annoy index using 1 thread, search_k = 3000
## 10:57:00 Annoy recall = 100%
## 10:57:01 Commencing smooth kNN distance calibration using 1 thread
## 10:57:02 Initializing from normalized Laplacian + noise
## 10:57:02 Commencing optimization for 500 epochs, with 252024 positive edges
## 10:57:12 Optimization finished
# integrated <- FindNeighbors(integrated, reduction = "pca", dims = 1:30)
# integrated <- FindClusters(integrated, resolution = 0.5)
# Visualization
DimPlot(integrated, reduction = "umap", group.by = "origin") + ggtitle("Integrated data")

DimPlot(integrated, reduction = "umap", group.by = "celltype") + ggtitle("Integrated data: cell type")

DimPlot(integrated, reduction = "umap", group.by = "ctHybridSub") + ggtitle("Integrated data: cell state of linage-traced scRNA-seq")

FeaturePlot(integrated, features = "Krtdap")

FeaturePlot(integrated, features = "Krt6a")

FeaturePlot(integrated, features = "Krt5")

FeaturePlot(integrated, features = "Krt14")

FeaturePlot(integrated, features = "Trp63")

FeaturePlot(integrated, features="Reg3g") #resp (Brann)

FeaturePlot(integrated, features="Foxj1") #resp ciliated (Brann)

FeaturePlot(integrated, features="Muc5ac") #resp secretory (Brann)

FeaturePlot(integrated, features="Cbr2") #resp basal cell (Boying)

FeaturePlot(integrated, features="Sult1c1") #resp basal and resp epithelial (Boying)

Session info

sessionInfo()
## R version 3.6.2 (2019-12-12)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.5
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] patchwork_1.0.0             Seurat_3.1.5               
##  [3] Signac_0.2.5                forcats_0.4.0              
##  [5] stringr_1.4.0               dplyr_1.0.3                
##  [7] purrr_0.3.3                 readr_1.3.1                
##  [9] tidyr_1.0.2                 tibble_3.1.6               
## [11] tidyverse_1.3.0             irlba_2.3.3                
## [13] Matrix_1.3-2                gridExtra_2.3              
## [15] wesanderson_0.3.6           pheatmap_1.0.12            
## [17] ggplot2_3.3.2               RColorBrewer_1.1-2         
## [19] clusterExperiment_2.6.1     bigmemory_4.5.36           
## [21] rgl_0.100.30                cowplot_1.0.0              
## [23] SingleCellExperiment_1.8.0  SummarizedExperiment_1.16.1
## [25] DelayedArray_0.12.2         BiocParallel_1.20.1        
## [27] matrixStats_0.56.0          Biobase_2.46.0             
## [29] GenomicRanges_1.38.0        GenomeInfoDb_1.22.0        
## [31] IRanges_2.20.2              S4Vectors_0.24.3           
## [33] BiocGenerics_0.32.0         slingshot_1.4.0            
## [35] princurve_2.1.4            
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.1.4              reticulate_1.14         tidyselect_1.1.0       
##   [4] RSQLite_2.2.0           AnnotationDbi_1.48.0    htmlwidgets_1.5.1      
##   [7] grid_3.6.2              Rtsne_0.15              RNeXML_2.4.2           
##  [10] munsell_0.5.0           ica_1.0-2               codetools_0.2-16       
##  [13] future_1.16.0           miniUI_0.1.1.1          withr_2.1.2            
##  [16] colorspace_1.4-1        knitr_1.28              uuid_0.1-2             
##  [19] zinbwave_1.11.6         rstudioapi_0.11         ROCR_1.0-7             
##  [22] listenv_0.8.0           NMF_0.21.0              labeling_0.3           
##  [25] GenomeInfoDbData_1.2.2  farver_2.0.3            bit64_0.9-7            
##  [28] rhdf5_2.30.1            vctrs_0.3.8             generics_0.0.2         
##  [31] xfun_0.26               ggseqlogo_0.1           R6_2.4.1               
##  [34] doParallel_1.0.15       rsvd_1.0.2              locfit_1.5-9.1         
##  [37] manipulateWidget_0.10.0 bitops_1.0-6            assertthat_0.2.1       
##  [40] promises_1.1.0          scales_1.1.0            gtable_0.3.0           
##  [43] phylobase_0.8.6         npsurv_0.4-0            globals_0.12.5         
##  [46] rlang_0.4.10            genefilter_1.68.0       gggenes_0.4.0          
##  [49] splines_3.6.2           lazyeval_0.2.2          broom_0.7.10           
##  [52] yaml_2.2.1              reshape2_1.4.3          modelr_0.1.5           
##  [55] crosstalk_1.0.0         backports_1.1.5         httpuv_1.5.2           
##  [58] tools_3.6.2             gridBase_0.4-7          ellipsis_0.3.2         
##  [61] gplots_3.0.1.2          jquerylib_0.1.3         ggridges_0.5.2         
##  [64] Rcpp_1.0.6              plyr_1.8.5              progress_1.2.2         
##  [67] zlibbioc_1.32.0         RCurl_1.98-1.1          prettyunits_1.1.1      
##  [70] pbapply_1.4-2           zoo_1.8-7               ggrepel_0.8.1          
##  [73] haven_2.2.0             cluster_2.1.0           fs_1.3.1               
##  [76] magrittr_1.5            data.table_1.12.8       RSpectra_0.16-0        
##  [79] lmtest_0.9-37           reprex_0.3.0            RANN_2.6.1             
##  [82] fitdistrplus_1.0-14     hms_0.5.3               lsei_1.2-0             
##  [85] mime_0.9                evaluate_0.14           xtable_1.8-4           
##  [88] XML_3.99-0.3            readxl_1.3.1            compiler_3.6.2         
##  [91] KernSmooth_2.23-16      crayon_1.3.4            htmltools_0.5.1.1      
##  [94] later_1.0.0             RcppParallel_4.4.4      lubridate_1.7.4        
##  [97] howmany_0.3-1           DBI_1.1.0               dbplyr_1.4.2           
## [100] MASS_7.3-51.4           ade4_1.7-13             cli_2.0.1              
## [103] gdata_2.18.0            igraph_1.2.4.2          pkgconfig_2.0.3        
## [106] bigmemory.sri_0.1.3     rncl_0.8.3              registry_0.5-1         
## [109] locfdr_1.1-8            plotly_4.9.1            xml2_1.3.2             
## [112] foreach_1.4.7           annotate_1.64.0         bslib_0.2.4            
## [115] rngtools_1.5            pkgmaker_0.31           webshot_0.5.2          
## [118] XVector_0.26.0          bibtex_0.4.2.2          rvest_0.3.5            
## [121] digest_0.6.27           tsne_0.1-3              sctransform_0.3.2      
## [124] RcppAnnoy_0.0.16        softImpute_1.4          Biostrings_2.54.0      
## [127] leiden_0.3.4            rmarkdown_2.11          cellranger_1.1.0       
## [130] uwot_0.1.5              edgeR_3.28.0            kernlab_0.9-29         
## [133] shiny_1.4.0             Rsamtools_2.2.1         gtools_3.8.1           
## [136] lifecycle_1.0.1         nlme_3.1-142            jsonlite_1.6.1         
## [139] Rhdf5lib_1.8.0          viridisLite_0.3.0       limma_3.42.1           
## [142] fansi_0.4.1             pillar_1.6.4            lattice_0.20-38        
## [145] fastmap_1.0.1           httr_1.4.1              survival_3.1-8         
## [148] glue_1.4.2              png_0.1-7               iterators_1.0.12       
## [151] bit_1.1-15.2            stringi_1.4.5           sass_0.3.1             
## [154] HDF5Array_1.14.1        ggfittext_0.9.0         blob_1.2.1             
## [157] caTools_1.18.0          memoise_1.1.0           future.apply_1.4.0     
## [160] ape_5.3